## 
## Attaching package: 'jsonlite'
## 
## The following object is masked from 'package:utils':
## 
##     View
## 
## Loading required package: ggplot2

The data set is “Seattle Police Department 911 Incident Response.”

People have called 911 at different times in the city of Seattle for various incidents. The incidents are described by the column “initial_type_subgroup” which are further subgrouped under“initial_type_description”.

The SPD has divided different areas in the city by “district_sectors” which are further subdivided into smaller zones indicated by the column “zone_beat”. Also, each incident has been given a reference number indicated by the column name “cad_event_number” and “cad_cdw_id”.

Depending on where the crime was reported from, a latitude and longitude were collected using which we can get the approximate location of where the incident/crime occurred. The variable “at_scene_time” gives us a time when the incident/crime was reported through 911 to the SPD.

#Get the data from Socrata's open data website and save it in a variable called spd911
spd911 <- fromJSON("https://data.seattle.gov/resource/3k2p-39jp.json")
#Look at the top 6 rows of the data frame
head(spd911)
##   cad_event_number cad_cdw_id zone_beat
## 1      15000035997     581875        K3
## 2      15000035929     581910        Q1
## 3      15000035487     582157        K3
## 4      15000035390     582215        F2
## 5      15000035285     582277        L1
## 6      15000035205     582326        F1
##                           initial_type_description district_sector
## 1     AUTO THEFT - VEH THEFT OR THEFT AND RECOVERY               K
## 2 NARCOTICS - VIOLATIONS (LOITER, USE, SELL, NARS)               Q
## 3                            FOOT - ELUDING POLICE               K
## 4                                    AUTO RECOVERY               F
## 5                                    AUTO RECOVERY               L
## 6                                    AUTO RECOVERY               F
##   initial_type_subgroup incident_location.needs_recoding
## 1           AUTO THEFTS                            FALSE
## 2  NARCOTICS COMPLAINTS                            FALSE
## 3 TRAFFIC RELATED CALLS                            FALSE
## 4           AUTO THEFTS                            FALSE
## 5           AUTO THEFTS                            FALSE
## 6           AUTO THEFTS                            FALSE
##   incident_location.longitude incident_location.latitude
## 1              -122.330271593               47.600875809
## 2               -122.37613941               47.636336049
## 3              -122.326350868               47.601708802
## 4              -122.363172642               47.525585666
## 5              -122.304248161               47.727498035
## 6              -122.369833395               47.546493546
##     hundred_block_location general_offense_number      longitude
## 1 3 AV S / S WASHINGTON ST              201535997 -122.330271593
## 2    20XX BLOCK OF 15 AV W              201535929 -122.376139410
## 3         6 AV / YESLER WY              201535487 -122.326350868
## 4   86XX BLOCK OF 24 AV SW              201535390 -122.363172642
## 5  135XX BLOCK OF 23 AV NE              201535285 -122.304248161
## 6   63XX BLOCK OF 29 AV SW              201535205 -122.369833395
##       latitude       at_scene_time    initial_type_group census_tract
## 1 47.600875809 2015-02-01T00:20:00       AUTO RECOVERIES    9200.2014
## 2 47.636336049 2015-01-31T23:12:00  NARCOTICS COMPLAINTS    5802.2003
## 3 47.601708802 2015-01-31T15:14:00 TRAFFIC RELATED CALLS    9200.1002
## 4 47.525585666 2015-01-31T13:36:00       AUTO RECOVERIES   11401.2005
## 5 47.727498035 2015-01-31T12:08:00       AUTO RECOVERIES     200.6017
## 6 47.546493546 2015-01-31T10:24:00       AUTO RECOVERIES   10700.4001
##   event_clearance_code event_clearance_subgroup event_clearance_group
## 1                 <NA>                     <NA>                  <NA>
## 2                 <NA>                     <NA>                  <NA>
## 3                 <NA>                     <NA>                  <NA>
## 4                 <NA>                     <NA>                  <NA>
## 5                 <NA>                     <NA>                  <NA>
## 6                 <NA>                     <NA>                  <NA>
##   event_clearance_description
## 1                        <NA>
## 2                        <NA>
## 3                        <NA>
## 4                        <NA>
## 5                        <NA>
## 6                        <NA>
#Look at all the column names in the data frame
colnames(spd911)
##  [1] "cad_event_number"            "cad_cdw_id"                 
##  [3] "zone_beat"                   "initial_type_description"   
##  [5] "district_sector"             "initial_type_subgroup"      
##  [7] "incident_location"           "hundred_block_location"     
##  [9] "general_offense_number"      "longitude"                  
## [11] "latitude"                    "at_scene_time"              
## [13] "initial_type_group"          "census_tract"               
## [15] "event_clearance_code"        "event_clearance_subgroup"   
## [17] "event_clearance_group"       "event_clearance_description"
#Look at the structure of the data frame
str(spd911)
## 'data.frame':    1000 obs. of  18 variables:
##  $ cad_event_number           : chr  "15000035997" "15000035929" "15000035487" "15000035390" ...
##  $ cad_cdw_id                 : chr  "581875" "581910" "582157" "582215" ...
##  $ zone_beat                  : chr  "K3" "Q1" "K3" "F2" ...
##  $ initial_type_description   : chr  "AUTO THEFT - VEH THEFT OR THEFT AND RECOVERY" "NARCOTICS - VIOLATIONS (LOITER, USE, SELL, NARS)" "FOOT - ELUDING POLICE" "AUTO RECOVERY" ...
##  $ district_sector            : chr  "K" "Q" "K" "F" ...
##  $ initial_type_subgroup      : chr  "AUTO THEFTS" "NARCOTICS COMPLAINTS" "TRAFFIC RELATED CALLS" "AUTO THEFTS" ...
##  $ incident_location          :'data.frame': 1000 obs. of  3 variables:
##   ..$ needs_recoding: logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##   ..$ longitude     : chr  "-122.330271593" "-122.37613941" "-122.326350868" "-122.363172642" ...
##   ..$ latitude      : chr  "47.600875809" "47.636336049" "47.601708802" "47.525585666" ...
##  $ hundred_block_location     : chr  "3 AV S / S WASHINGTON ST" "20XX BLOCK OF 15 AV W" "6 AV / YESLER WY" "86XX BLOCK OF 24 AV SW" ...
##  $ general_offense_number     : chr  "201535997" "201535929" "201535487" "201535390" ...
##  $ longitude                  : chr  "-122.330271593" "-122.376139410" "-122.326350868" "-122.363172642" ...
##  $ latitude                   : chr  "47.600875809" "47.636336049" "47.601708802" "47.525585666" ...
##  $ at_scene_time              : chr  "2015-02-01T00:20:00" "2015-01-31T23:12:00" "2015-01-31T15:14:00" "2015-01-31T13:36:00" ...
##  $ initial_type_group         : chr  "AUTO RECOVERIES" "NARCOTICS COMPLAINTS" "TRAFFIC RELATED CALLS" "AUTO RECOVERIES" ...
##  $ census_tract               : chr  "9200.2014" "5802.2003" "9200.1002" "11401.2005" ...
##  $ event_clearance_code       : chr  NA NA NA NA ...
##  $ event_clearance_subgroup   : chr  NA NA NA NA ...
##  $ event_clearance_group      : chr  NA NA NA NA ...
##  $ event_clearance_description: chr  NA NA NA NA ...

Since our analyses does not include all the columns it makes sense to get rid of the data that is not important for our analyses. This does not mean that the data is of no use. The removed data can be used in another analyses.

We first remove all such columns and then we perform further data cleaning by type casting certain columns to appropriate data types which would make analysis easy for us

#These are the columns that are of least use to us and for our analysis we do not require them, hence we simply remove these columns
spd911$event_clearance_code<-NULL
spd911$event_clearance_group<-NULL
spd911$event_clearance_subgroup<-NULL
spd911$event_clearance_description<-NULL
spd911$incident_location<-NULL

#After performing some basic data cleaning we also need to perform type casting to ensure that all the columns in the data frame can be used appropriately
spd911$cad_cdw_id<-as.integer(spd911$cad_cdw_id)
spd911$general_offense_number<-as.integer(spd911$general_offense_number)
spd911$district_sector<-as.factor(spd911$district_sector)
spd911$longitude<-as.numeric(spd911$longitude)
spd911$latitude<-as.numeric(spd911$latitude)
#Since there is an additional "T" in the time we need to replace it by a blank space
spd911$at_scene_time<-gsub("T"," ",spd911$at_scene_time)  
spd911$at_scene_time<-as.POSIXct(spd911$at_scene_time)

#Look at the structure of the cleaned data frame
str(spd911)
## 'data.frame':    1000 obs. of  13 variables:
##  $ cad_event_number        : chr  "15000035997" "15000035929" "15000035487" "15000035390" ...
##  $ cad_cdw_id              : int  581875 581910 582157 582215 582277 582326 582470 582573 582638 582653 ...
##  $ zone_beat               : chr  "K3" "Q1" "K3" "F2" ...
##  $ initial_type_description: chr  "AUTO THEFT - VEH THEFT OR THEFT AND RECOVERY" "NARCOTICS - VIOLATIONS (LOITER, USE, SELL, NARS)" "FOOT - ELUDING POLICE" "AUTO RECOVERY" ...
##  $ district_sector         : Factor w/ 18 levels "99","B","C","D",..: 9 14 9 6 10 6 15 18 14 15 ...
##  $ initial_type_subgroup   : chr  "AUTO THEFTS" "NARCOTICS COMPLAINTS" "TRAFFIC RELATED CALLS" "AUTO THEFTS" ...
##  $ hundred_block_location  : chr  "3 AV S / S WASHINGTON ST" "20XX BLOCK OF 15 AV W" "6 AV / YESLER WY" "86XX BLOCK OF 24 AV SW" ...
##  $ general_offense_number  : int  201535997 201535929 201535487 201535390 201535285 201535205 201534946 201534755 201534638 201534610 ...
##  $ longitude               : num  -122 -122 -122 -122 -122 ...
##  $ latitude                : num  47.6 47.6 47.6 47.5 47.7 ...
##  $ at_scene_time           : POSIXct, format: "2015-02-01 00:20:00" "2015-01-31 23:12:00" ...
##  $ initial_type_group      : chr  "AUTO RECOVERIES" "NARCOTICS COMPLAINTS" "TRAFFIC RELATED CALLS" "AUTO RECOVERIES" ...
##  $ census_tract            : chr  "9200.2014" "5802.2003" "9200.1002" "11401.2005" ...

We want to plot a map of Seattle that shows the areas where the crimes occur, rather, where the call to 911 was made from. If we analyze the data points on the map we can see that although the calls are spread out all over Seattle, there is a high concentration of calls that is made from the center close to Capitol Hill and Downtown Seattle. This is something that we can look into as to why the crime rate is so high in these areas.

#change the crimes data into a SpatialPointsDataFrame
coords <- cbind(longitude = as.numeric(as.character(spd911$longitude)), latitude=as.numeric(as.character(spd911$latitude)))
crime_points <- SpatialPointsDataFrame(coords, spd911[, -(9:10)])
#Plot just the points where the crime occurred without a map of the city
plot(crime_points, pch = ".", col = "darkred",cex=4)

#Create a map of seattle
map <- qmap('Seattle',zoom=11,maptype='hybrid')
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Seattle&zoom=11&size=640x640&scale=2&maptype=hybrid&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Seattle&sensor=false
#Plot the crime points on top of the map that we created to show the locations where the crimes occurred
map + geom_point(data = spd911, aes(x = spd911$longitude, y = spd911$latitude), color="red", size=3, alpha=0.4,na.rm = T)

We would also like to see what type of crimes occur by district sectors. To further analyze the data we can see their frequency in each sub sector, that is, zone beat the crimes occur in which are encoded by color.

#Create a list that contains all the district sectors in our data frame
district_sector_list<-list(as.character(unique(spd911$district_sector)))
#district_sector_list[[1]][1]

#Create a for loop that takes each individual district sectors
for(i in 1:length(district_sector_list[[1]])){
  
  #Store the district sector in a variable
  sector<-district_sector_list[[1]][i]
  #Create a smaller subset data frame for a specific district sector
  subset_sector<-subset(spd911,district_sector==sector)

  #Use xtabs() function on a factor to get a contingency table to make it easier to create a histogram
  initial_type_description_subgroup<-xtabs(~subset_sector$initial_type_subgroup)
  #Modify the margin to ensure the graph and the details are all visible
  par(mar=c(10,3,3,1))
  #Create a barplot
  barplot(initial_type_description_subgroup,col = as.factor(subset_sector$zone_beat),
  ylim=c(0,20),las=2,cex.names = 0.5,xlab = "")
  legend("topright",pch = 20,col=unique(as.factor(subset_sector$zone_beat)),
  legend=unique(subset_sector$zone_beat),cex = 0.5,title = "Zone Beats")
  mtext("Crimes Committed",side = 1,line = 9)
}

One of the biases of our analysis is that we have assumed that the calls to 911 were made from the same location or close to the same location as to where the crime was committed